# Replication Package for
# "Bias and Overconfidence in Parametric Models of Interactive Processes"
# by William Berry, Jacqueline H. R. Demeritt, and Justin Esarey
#
# Dichotomous independent variable analysis
# 
# Package prepared by J. Esarey on 3/25/2015


# NEEDED PACKAGES
# in R: foreign, locfit, ROCR, mvtnorm, snow, rlecuyer, sfsmisc, VGAM
# in Stata: mat2txt and hl (latter from http://www.homepages.ucl.ac.uk/~ucakgam/stata/)

# uncomment next line if packages need to be installed
# install.packages(c("locfit", "ROCR", "mvtnorm", "snow", "rlecuyer", "sfsmisc", "VGAM"), dep=T)

# This replication file exists in four parts:

# (1) generate monte carlo data sets from 155 sample DGPs
# (2) estimate models on the monte carlo data sets & save results (takes a long time)
# (3) conduct summary analysis on model results
# (4) conduct size/power analysis for product term probit & logit models


rm(list=ls())
# set directory to the location of this replication file
setwd("J:/Dropbox/jesarey_documents/BRE Logit Misspecification Paper/Final Replication Package")



###############################################################
# Part One: generate Monte Carlo Data Sets
# if data has not already been drawn, uncomment line
# to create the Monte Carlo data sets
###############################################################
# source(file="draw_mc_data.r")



###############################################################
# Part Two: estimate models on MC data sets and save results
# note: this is very time consuming!
###############################################################

rm(list=ls())
set.seed(123456)


# Set number of samples to draw
reps<-25

# Load necessary libraries
require(foreign) 
require(locfit)
require(ROCR)
require(mvtnorm)
require(snow)
require(sfsmisc)
require(VGAM)

# start up cluster
NODES<-12 #number of cluster nodes to establish
cl <- makeCluster(rep("localhost", NODES), type = "SOCK")
clusterSetupRNG (cl, type = "RNGstream", seed=rep(398123, 6))

invisible(clusterEvalQ(cl, library(VGAM)))


# Load functions to report true Probability y=1

trupry<-function(r,X,Z){
  
  if(r==85) {
    trupry1 = (((X -1)^2 / -10) + .9) + ((((Z + .75)^2 / 2.5) -.225) *((((X -1.250000075)^2 / -3.750000375) + 0.516666675) - (((X -1)^2 / -10) + .9)))
    return(trupry1)}
  
  if (r==82) {
    trupry1 = (((X -1)^2 / 10) + .3) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X -1)^2 / 2.5) + .5) - (((X -1)^2 / 10) + .3)))
    return(trupry1)}
  
  if(r==86) {
    trupry1 = (((X + 0)^2 / 2) + 0.4) + ((((Z + .125)^2 / 1.25) - .0125) *((((X + 0)^2 / 5) + 0.1) - (((X + 0)^2 / 2) + 0.4)))
    return(trupry1)}
  
  if(r==87) {
    trupry1 = ((1-Z)*((((X + 0.2499994)^2 / 2.9999976) + 0.27916675))) + ((Z)*(((X + 0)^2 / 5 + 0.1)))
    return(trupry1)}
  
  if(r==88) {
    trupry1 = (((X - 1.25)^2 / -5) + 0.9125) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X - 1.25)^2 / -2.5) + 0.825) - (((X - 1.25)^2 / -5) + 0.9125)))
    return(trupry1)}
  
  if(r==80) {
    trupry1 = ((1-Z)*((((X -1.249785775)^2 / 2.142245073) + 0.170874979))) + ((Z)*(((X -1.246268657)^2 / 3.731343284 + 0.083746269)))
    return(trupry1)}
  
  if(r==100) {
    trupry1 = (((X -1)^2 / -2) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125) *((((X -1.248502994)^2 / -14.97005988) + 0.90412515) - (((X -1)^2 / -2) + 0.8)))
    return(trupry1)}
  
  if(r==101) {
    trupry1 = (((X + 0)^2 / 10) + 0.2) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X + 0.25030012)^2 / 3.00120048) + 0.37912497) - (((X + 0)^2 / 10) + 0.2)))
    return(trupry1)}
  
  if(r==96) {
    trupry1 = (((X + 2.77556E-16)^2 / -5) + 0.9) + ((((Z + .75)^2 / 2.5) - .225) *((((X + 0)^2 / -1.666666667) + 0.8) - (((X + 2.77556E-16)^2 / -5) + 0.9)))
    return(trupry1)}
  
  if(r==97) {
    trupry1 = (((X -1)^2 / 1.428571429) + 0.2) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X - 1.25)^2 / 5) + 0.0875) - (((X -1)^2 / 1.428571429) + 0.2)))
    return(trupry1)}
  
  if(r==111) {
    trupry1 = (((X + 0.25)^2 / -2.5) + 0.725) + ((((Z + .75)^2 / 2.5) - .225) *((((X + 5.55112E-16)^2 / -10) + 0.9) - (((X + 0.25)^2 / -2.5) + 0.725)))
    return(trupry1)}
  
  if(r==112) {
    trupry1 = (((X + 0)^2 / -1.428571429) + 0.8) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X + 0.257575758)^2 / -7.575757576) + 0.908757576) - (((X + 0)^2 / -1.428571429) + 0.8)))
    return(trupry1)}
  
  if(r==89) {
    trupry1 = (((X + 0)^2 / 10) + 0.7) + ((((Z + .125)^2 / 1.25) - .0125)*((((X + 0.249625187)^2 / 3.748125937) + 0.083374963) - (((X + 0)^2 / 10) + 0.7)))
    return(trupry1)}
  
  if(r==83) {
    trupry1 = (((X + 5.55112E-16)^2 / -10) + 0.5) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0)^2 / -2.5) + 0.9) - (((X + 5.55112E-16)^2 / -10) + 0.5)))
    return(trupry1)}
  
  if(r==81) {
    trupry1 = (((X -1.249999985)^2 / 7.49999985) + 0.691666668) + ((((Z + .125)^2 / 1.25) - .0125) *((((X - 1)^2 / 2) + 0.3) - (((X -1.249999985)^2 / 7.49999985) + 0.691666668)))
    return(trupry1)}
  
  if(r==90) {
    trupry1 = (((X -1.253012048)^2 / -3.012048193) + 0.921253012) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X -1.257575758)^2 / -7.575757576) + 0.508757576) - (((X -1.253012048)^2 / -3.012048193) + 0.921253012)))
    return(trupry1)}
  
  if(r==91) {
    trupry1 = (((X - 1.25)^2 / -2.5) + 0.925) + ((((Z + .75)^2 / 2.5) - .225) *((((X - 1.25)^2 / -5) + 0.5125) - (((X - 1.25)^2 / -2.5) + 0.925)))
    return(trupry1)}
  
  if(r==84) {
    trupry1 = (((X -1.247863248)^2 / 2.136752137) + 0.071247863) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.246268657)^2 / 3.731343284) + 0.483746269) - (((X -1.247863248)^2 / 2.136752137) + 0.071247863)))
    return(trupry1)}
  
  if(r==98) {
    trupry1 = (((X - 1)^2 / 10) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125)*((((X -1.25030012)^2 / 3.00120048) + 0.07912497) - (((X - 1)^2 / 10) + 0.8)))
    return(trupry1)}
  
  if(r==99) {
    trupry1 = ((1-Z)*((((X - 1)^2 / -2) + 0.8))) + ((Z)*(((X - 1)^2 / -10 + 0.3)))
    return(trupry1)}
  
  if(r==102) {
    trupry1 = ((1-Z)*((((X + 0)^2 / 1.666666667) + 0.1))) + ((Z)*(((X + 0.250750751)^2 / 7.507507508 + 0.691624925)))
    return(trupry1)}
  
  if(r==103) {
    trupry1 = (((X + 0.249785775)^2 / 2.142245073) + 0.070874979) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X + 0.25)^2 / 5) + 0.5875) - (((X + 0.249785775)^2 / 2.142245073) + 0.070874979)))
    return(trupry1)}
  
  if(r==113) {
    trupry1 = ((1-Z)*((((X -5.55112E-16)^2 / -10) + 0.3))) + ((Z)*(((X + 0)^2 / -1.666666667 + 0.9)))
    return(trupry1)}
  
  if(r==108) {
    trupry1 = (((X + 0)^2 / -1.428571429) + 0.9) + ((((Z + .75)^2 / 2.5) - .225)*((((X + 0.257575758)^2 / -7.575757576) + 0.308757576) - (((X + 0)^2 / -1.428571429) + 0.9)))
    return(trupry1)}
  
  if(r==77) {
    trupry1 = 0.2+(0.1*X)+(0.2*Z)+(0.3*X*Z)
    return(trupry1)}
  
  if(r==74) {
    trupry1 = 0.2+(-0.1*X)+(0.4*Z)+(-0.3*X*Z)
    return(trupry1)}
  
  if(r==75) {
    trupry1 = 0.2+(0.2*X)+(0.1*Z)+(0.3*X*Z)
    return(trupry1)}
  
  if(r==76) {
    trupry1 = 0.6+(0.2*X)+(-0.5*Z)+(0.3*X*Z)
    return(trupry1)}
  
  if(r==78) {
    trupry1 = 0.1+(0.6*X)+(0.5*Z)+(-0.3*X*Z)
    return(trupry1)}
  
  if(r==79) {
    trupry1 = 0.1+(0.7*X)+(0.4*Z)+(-0.3*X*Z)
    return(trupry1)}
  
  if(r==93) {
    trupry1 = 0.4+(-0.1*X)+(0.5*Z)+(-0.4*X*Z)
    return(trupry1)}
  
  if(r==94) {
    trupry1 = 0.7+(-0.5*X)+(0.2*Z)+(0.4*X*Z)
    return(trupry1)}
  
  if(r==95) {
    trupry1 = 0.3+(0.6*X)+(-0.1*Z)+(-0.4*X*Z)
    return(trupry1)}
  
  if(r==92) {
    trupry1 = 0.9+(-0.3*X)+(-0.1*Z)+(-0.4*X*Z)
    return(trupry1)}
  
  if(r==105) {
    trupry1 = 0.8+(-0.6*X)+(0.1*Z)+(0.5*X*Z)
    return(trupry1)}
  
  if(r==104) {
    trupry1 = 0.9+(-0.7*X)+(-0.6*Z)+(0.5*X*Z)
    return(trupry1)}
  
  if(r==48) {
    trupry1 = (((X + 0)^2 / 3.333333333) + 0.3) + ((((Z + .75)^2 / 2.5) - .225)*((((X + 3.88578E-16)^2 / 3.333333333) + 0.6) - (((X + 0)^2 / 3.333333333) + 0.3)))
    return(trupry1)}
  
  if(r==40) {
    trupry1 = (((X - 1)^2 / 3.333333333) + 0.5) + ((((Z + .125)^2 / 1.25) - .0125)*((((X - 1)^2 / 3.333333333) + 0.2) - (((X - 1)^2 / 3.333333333) + 0.5)))
    return(trupry1)}
  
  if(r==41) {
    trupry1 = (((X - 1.25)^2 / 5) + 0.3875) + ((((Z + .125)^2 / 1.25) - .0125)*((((X - 1.25)^2 / 5) + 0.0875) - (((X - 1.25)^2 / 5) + 0.3875)))
    return(trupry1)}
  
  if(r==43) {
    trupry1 = (Z*((((X + 0)^2 / -3.333333333) + 0.4))) + ((1-Z)*(((X + 3.88578E-16)^2 / -3.333333333 + 0.8)))
    return(trupry1)}
  
  if(r==42) {
    trupry1 = (((X + 0.25)^2 / -5) + 0.9125) + ((((Z - 1.125)^2 / -1.25) + 1.0125)*((((X + 0.25)^2 / -5) + 0.5125) - (((X + 0.25)^2 / -5) + 0.9125)))
    return(trupry1)}
  
  if(r==46) {
    trupry1 = (Z*((((X + 3.88578E-16)^2 / 3.333333333) + 0.6))) + ((1-Z)*(((X + 0)^2 / 3.333333333 + 0.1)))
    return(trupry1)}
  
  if(r==49) {
    trupry1 = (((X - 1)^2 / -2.5) + 0.5) + ((((Z - 1.125)^2 / -1.25) + 1.0125)*((((X - 1)^2 / -2.5) + 0.8) - (((X - 1)^2 / -2.5) + 0.5)))
    return(trupry1)}
  
  if(r==47) {
    trupry1 = (((X -1.249625187)^2 / -3.748125937) + 0.916625037) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.249625187)^2 / -3.748125937) + 0.616625037) - (((X -1.249625187)^2 / -3.748125937) + 0.916625037)))
    return(trupry1)}
  
  if(r==44) {
    trupry1 = (((X -1.249625187)^2 / 3.748125937) + 0.083374963) + ((((Z + .75)^2 / 2.5) - .225)*((((X -1.249625187)^2 / 3.748125937) + 0.483374963) - (((X -1.249625187)^2 / 3.748125937) + 0.083374963)))
    return(trupry1)}
  
  if(r==45) {
    trupry1 = (((X + 0.25030012)^2 / -3.00120048) + 0.62087503) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0.25030012)^2 / -3.00120048) + 0.92087503) - (((X + 0.25030012)^2 / -3.00120048) + 0.62087503)))
    return(trupry1)}
  
  if(r==17) {
    trupry1 = 0.4+(-0.3*X)+(0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==18) {
    trupry1 = 0.5+(-0.3*X)+(0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==19) {
    trupry1 = 0.6+(0.3*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==22) {
    trupry1 = 0.2+(0.3*X)+(0.4*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==20) {
    trupry1 = 0.6+(0.3*X)+(-0.4*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==23) {
    trupry1 = 0.1+(0.3*X)+(0.5*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==21) {
    trupry1 = 0.5+(0.4*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==15) {
    trupry1 = 0.9+(-0.4*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==24) {
    trupry1 = 0.1+(0.4*X)+(0.4*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==16) {
    trupry1 = 0.9+(-0.5*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==59) {
    trupry1 = (((X+0)^2 / 3.333333) + 0.3) + ((((Z + .75)^2 / 2.5) - .225) * ((((X + .249962502)^2 / 3.749812509) + 0.4833375) - (((X + 0)^2 / 3.333333) + 0.3)))
    return(trupry1)}
  
  if(r==54) {
    trupry1 = (((X + -1)^2 / 3.33333333) + 0.5) + ((((Z + .125)^2 / 1.25) - .0125) * ((((X + -1)^2 / 5) + 0.2) - (((X + -1)^2 / 3.33333333) + 0.5)))
    return(trupry1)}
  
  if(r==55) {
    trupry1 = (((X + -1.25)^2 / 5) + 0.3875) + ((((Z + 0.125)^2 / 1.25) - 0.0125) * ((((X + -1)^2 / 5) + 0.2) - (((X + -1.25)^2 / 5) + 0.3875)))
    return(trupry1)}
  
  if(r==56) {
    trupry1 = (Z*(((X + 0)^2 / -3.33333333) + 0.4)) + ((1-Z)*(((X + 4.44089e-16)^2 / -2.5) + 0.8))
    return(trupry1)}
  
  if(r==60) {
    trupry1 = (Z*((((X + .25)^2 / 5) + 0.5875))) + ((1-Z)*(((X + 0)^2 / 2.5 + 0.1)))
    return(trupry1)}
  
  if(r==61) {
    trupry1 = (((X - 1)^2 / -2.5) + 0.5) + ((((Z - 1.125)^2 / -1.25) + 1.0125)*((((X - 1)^2 / -3.33333333) + 0.8) - (((X - 1)^2 / -2.5) + 0.5)))
    return(trupry1)}
  
  if(r==58) {
    trupry1 = (((X -1)^2 / -2) + 0.9) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.249625187)^2 / -3.748125937) + 0.616625037) - (((X -1)^2 / -2) + 0.9)))
    return(trupry1)}
  
  if(r==57) {
    trupry1 = (((X + 0.25)^2 / -2.5) + 0.725) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0.25030012)^2 / -3.00120048) + 0.92087503) - (((X + 0.25)^2 / -2.5) + 0.725)))
    return(trupry1)}
  
  if(r==51) {
    trupry1 = 0.5+(-0.3*X)+(0.3*Z)+(.1*X*Z)
    return(trupry1)}
  
  if(r==52){
    trupry1 = 0.6+(0.3*X)+(-0.4*Z)+(0.1*X*Z)
    return(trupry1)}
  
  if(r==53){
    trupry1 = 0.1+(0.3*X)+(0.5*Z)+(-0.1*X*Z)
    return(trupry1)}
  
  if(r==50){
    trupry1 = 0.9+(-0.4*X)+(-0.3*Z)+(0.1*X*Z)
    return(trupry1)}
  
  if(r==66){
    trupry1 = (((X - 1)^2 / 5) + 0.3) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X -1)^2 / 2.5) + .5) - (((X -1)^2 / 5) + .3)))
    return(trupry1)}
  
  if(r==69){
    trupry1 = (((X + 0)^2 / 2.5) + 0.5) + ((((Z + .125)^2 / 1.25) - .0125) *((((X + 0)^2 / 5) + 0.1) - (((X + 0)^2 / 2.5) + 0.5)))
    return(trupry1)}
  
  if(r==70){
    trupry1 = ((1-Z)*((((X + 0.2499994)^2 / 2.9999976) + 0.27916675))) + ((Z)*(((X + 0)^2 / 3.33333333 + 0.1)))
    return(trupry1)}
  
  if(r==71){
    trupry1 = (((X - 1.25)^2 / -5) + 0.9125) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X - 1.25030012)^2 / -3.00120048) + 0.82087503) - (((X - 1.25)^2 / -5) + 0.9125)))
    return(trupry1)}
  
  if(r==72){
    trupry1 = (((X + 0)^2 / 10) + 0.7) + ((((Z + .125)^2 / 1.25) - .0125)*((((X + 0.25)^2 / 5) + 0.1875) - (((X + 0)^2 / 10) + 0.7)))
    return(trupry1)}
  
  if(r==67){
    trupry1 = (((X + 0)^2 / -5) + 0.5) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0)^2 / -2.5) + 0.9) - (((X + 0)^2 / -5) + 0.5)))
    return(trupry1)}
  
  if(r==73){
    trupry1 = (((X -1.253012048)^2 / -3.012048193) + 0.921253012) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X -1.25)^2 / -5) + 0.6125) - (((X -1.253012048)^2 / -3.012048193) + 0.921253012)))
    return(trupry1)}
  
  if(r==68){
    trupry1 = (((X -1.247863248)^2 / 2.136752137) + 0.071247863) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.25030012)^2 / 3.00120048) + 0.37912497) - (((X -1.247863248)^2 / 2.136752137) + 0.071247863)))
    return(trupry1)}
  
  if(r==64){
    trupry1 = 0.2+(0.1*X)+(0.3*Z)+(0.2*X*Z)
    return(trupry1)}
  
  if(r==62){
    trupry1 = 0.2+(-0.1*X)+(0.3*Z)+(-0.2*X*Z)
    return(trupry1)}
  
  if(r==65){
    trupry1 = 0.2+(0.2*X)+(0.1*Z)+(0.2*X*Z)
    return(trupry1)}
  
  if(r==63){
    trupry1 = 0.5+(0.2*X)+(-0.4*Z)+(0.2*X*Z)
    return(trupry1)}
  
  if(r==115){
    trupry1 = (((X -1)^2 / -1.66666667) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125) *((((X -1.248502994)^2 / -14.97005988) + 0.90412515) - (((X -1)^2 / -1.66666667) + 0.8)))
    return(trupry1)}
  
  if(r==109){
    trupry1 = (((X -1)^2 / 1.428571429) + 0.2) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X - 1.250750751)^2 / 7.507507508) + 0.091624925) - (((X -1)^2 / 1.428571429) + 0.2)))
    return(trupry1)}
  
  if(r==110){
    trupry1 = (((X - 1)^2 / 10) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125)*((((X -1.25)^2 / 2.5) + 0.075) - (((X - 1)^2 / 10) + 0.8)))
    return(trupry1)}
  
  if(r==114){
    trupry1 = ((1-Z)*((((X - 1)^2 / -1.66666667) + 0.9))) + ((Z)*(((X - 1)^2 / -10 + 0.3)))
    return(trupry1)}
  
  if(r==106){
    trupry1 = 0.7+(-0.6*X)+(0.2*Z)+(0.5*X*Z)
    return(trupry1)}
  
  if(r==107){
    trupry1 = 0.3+(0.6*X)+(-0.1*Z)+(-0.5*X*Z)
    return(trupry1)}
  
  
  if(r==1){
    trupry1=0.1+0*X+0*Z
    return(trupry1)}
  
  if(r==116){
    trupry1=(.1+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==117){
    trupry1=.1+(Z*.2)+0*X
    return(trupry1)}
  
  if(r==118){
    trupry1=.1+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==13){
    trupry1=.1+(Z*.4)+0*X
    return(trupry1)}
  
  if(r==119){
    trupry1=(.1+.5)-(Z*.5)+0*X
    return(trupry1)}
  
  if(r==2){
    trupry1=0.2+0*X+0*Z
    return(trupry1)}
  
  if(r==120){
    trupry1=(.2+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==121){
    trupry1=(.2+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==14){
    trupry1=.2+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==122){
    trupry1=.2+(Z*.4)+0*X
    return(trupry1)}
  
  if(r==123){
    trupry1=(.2+.5)-(Z*.5)+0*X
    return(trupry1)}
  
  if(r==3){
    trupry1=0.3+0*X+0*Z
    return(trupry1)}
  
  if(r==124){
    trupry1=.3+(Z*.1)+0*X
    return(trupry1)}
  
  if(r==125){
    trupry1=.3+(Z*.2)+0*X
    return(trupry1)}
  
  if(r==126){
    trupry1=.3+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==10){
    trupry1=(.3+.4)-(Z*.4)+0*X
    return(trupry1)}
  
  if(r==127){
    trupry1=(.3+.5)-(Z*.5)+0*X
    return(trupry1)}
  
  if(r==4){
    trupry1=0.4+0*X+0*Z
    return(trupry1)}
  
  if(r==128){
    trupry1=(.4+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==129){
    trupry1=.4+(Z*.2)+0*X
    return(trupry1)}
  
  if(r==130){
    trupry1=.4+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==11){
    trupry1=(.4+.4)-(Z*.4)+0*X
    return(trupry1)}
  
  if(r==131){
    trupry1=.4+(Z*.5)+0*X
    return(trupry1)}
  
  if(r==5){
    trupry1=0.5+0*X+0*Z
    return(trupry1)}
  
  if(r==12){
    trupry1=(.5+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==132){
    trupry1=(.5+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==133){
    trupry1=(.5+.3)-(Z*.3)+0*X
    return(trupry1)}
  
  if(r==134){
    trupry1=.5+(Z*.4)+0*X
    return(trupry1)}
  
  if(r==6){
    trupry1=0.6+0*X+0*Z
    return(trupry1)}
  
  if(r==135){
    trupry1=.6+(Z*.1)+0*X
    return(trupry1)}
  
  if(r==136){
    trupry1=(.6+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==137){
    trupry1=.6+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==7){
    trupry1=0.7+0*X+0*Z
    return(trupry1)}
  
  if(r==138){
    trupry1=.7+(Z*.1)+0*X
    return(trupry1)}
  
  if(r==139){
    trupry1=(.7+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==8){
    trupry1=0.8+0*X+0*Z
    return(trupry1)}
  
  if(r==140){
    trupry1=(.8+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==9){
    trupry1=0.9+0*X+0*Z
    return(trupry1)}
  
  if(r==25){
    trupry1=(.1+.1)-(((((Z-1.125)^2)/-1.25)+1.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==141){
    trupry1=.1+(((((Z+.125)^2)/1.25)-.0125)*.2)+0*X
    return(trupry1)}
  
  if(r==33){
    trupry1=.1+(((((Z+.125)^2)/1.25)-.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==142){
    trupry1=.1+(((((Z+.75)^2)/2.5)-.225)*.4)+0*X
    return(trupry1)}
  
  if(r==143){
    trupry1=(.1+.5)-(((((Z-1.75)^2)/-2.5)+1.225)*.5)+0*X
    return(trupry1)}
  
  if(r==26){
    trupry1=(.2+.1)-(((((Z-1.75)^2)/-2.5)+1.225)*.1)+0*X
    return(trupry1)}
  
  if(r==27){
    trupry1=(.2+.2)-(((((Z-1.75)^2)/-2.5)+1.225)*.2)+0*X
    return(trupry1)}
  
  if(r==144){
    trupry1=.2+(((((Z+.75)^2)/2.5)-.225)*.3)+0*X
    return(trupry1)}
  
  if(r==145){
    trupry1=.2+(((((Z+.125)^2)/1.25)-.0125)*.4)+0*X
    return(trupry1)}
  
  if(r==28){
    trupry1=(.2+.5)-(((((Z-1.75)^2)/-2.5)+1.225)*.5)+0*X
    return(trupry1)}
  
  if(r==146){
    trupry1=.3+(((((Z+.125)^2)/1.25)-.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==147){
    trupry1=.3+(((((Z+.125)^2)/1.25)-.0125)*.2)+0*X
    return(trupry1)}
  
  if(r==34){
    trupry1=.3+(((((Z+.125)^2)/1.25)-.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==148){
    trupry1=(.3+.4)-(((((Z-1.125)^2)/-1.25)+1.0125)*.4)+0*X
    return(trupry1)}
  
  if(r==29){
    trupry1=(.3+.5)-(((((Z+.75)^2)/2.5)-.225)*.5)+0*X
    return(trupry1)}
  
  if(r==30){
    trupry1=(.4+.1)-(((((Z-1.125)^2)/-1.25)+1.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==35){
    trupry1=.4+(((((Z+.75)^2)/2.5)-.225)*.2)+0*X
    return(trupry1)}
  
  if(r==149){
    trupry1=.4+(((((Z-1.125)^2)/-1.25)+1.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==150){
    trupry1=(.4+.4)-(((((Z+.75)^2)/2.5)-.225)*.4)+0*X
    return(trupry1)}
  
  if(r==36){
    trupry1=.4+(((((Z-1.75)^2)/-2.5)+1.225)*.5)+0*X
    return(trupry1)}
  
  if(r==151){
    trupry1=(.5+.1)-(((((Z+.75)^2)/2.5)-.225)*.1)+0*X
    return(trupry1)}
  
  if(r==31){
    trupry1=(.5+.2)-(((((Z+.75)^2)/2.5)-.225)*.2)+0*X
    return(trupry1)}
  
  if(r==152){
    trupry1=(.5+.3)-(((((Z+.75)^2)/2.5)-.225)*.3)+0*X
    return(trupry1)}
  
  if(r==37){
    trupry1=.5+(((((Z-1.125)^2)/-1.25)+1.0125)*.4)+0*X
    return(trupry1)}
  
  if(r==153){
    trupry1=.6+(((((Z-1.125)^2)/-1.25)+1.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==154){
    trupry1=(.6+.2)-(((((Z+.125)^2)/1.25)-.0125)*.2)+0*X
    return(trupry1)}
  
  if(r==38){
    trupry1=.6+(((((Z-1.125)^2)/-1.25)+1.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==39){
    trupry1=.7+(((((Z-1.75)^2)/-2.5)+1.225)*.1)+0*X
    return(trupry1)}
  
  if(r==32){
    trupry1=(.7+.2)-(((((Z+.75)^2)/2.5)-.225)*.2)+0*X
    return(trupry1)}
  
  if(r==155){
    trupry1=(.8+.1)-(((((Z+.125)^2)/1.25)-.0125)*.1)+0*X
    return(trupry1)}
  
}


######################
# First Difference Calculation Functions
######################

fd.true<-function(dgp){
  
  x1.1<-c(rep(1, 2), rep(.75, 2), seq(from=0,to=1,by=0.05))
  x2.1<-c(0,1,0,1, rep(1, 21))
  
  x1.2<-c(rep(0, 2), rep(.25, 2), seq(from=0,to=1,by=0.05))
  x2.2<-c(0,1,0,1, rep(0, 21))
  
  return(trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))
  
}


fd<-function(beta, form, link.name){
  
  x1.1<-c(rep(1, 2), rep(.75, 2), seq(from=0,to=1,by=0.05))
  x2.1<-c(0,1,0,1, rep(1, 21))
  
  x1.2<-c(rep(0, 2), rep(.25, 2), seq(from=0,to=1,by=0.05))
  x2.2<-c(0,1,0,1, rep(0, 21))
  
  predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
  names(predict.dat.1)<-c("x1","x2")
  predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
  names(predict.dat.2)<-c("x1","x2")
  
  mm1<-model.matrix(form,predict.dat.1)
  mm2<-model.matrix(form,predict.dat.2)
  
  if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
  
    if(link.name=="regression"){
      link.fun<-gaussian(link = "identity")$linkinv
    }
  
    if(link.name=="loglog"){
      link.fun<-function(theta){exp(-exp(-theta))}
    }
    
    if(link.name=="logc"){
      link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
    }
  
  }else{
    link.fun<-binomial(link = link.name)$linkinv
  }
  
  
  
  xb1<-mm1%*%beta
  pry1<-link.fun(xb1)
  
  xb2<-mm2%*%beta
  pry2<-link.fun(xb2)
  
  return(as.vector(pry1-pry2))
  
}



central.fd<-function(beta, form, link.name){
  
  # This is the same as the first difference function above,
  # but focusing only on the calculation of first differences in 
  # probability at central locations in the data
  
  x1.1<-c(1,0.75,0.5)
  x2.1<-c(0.5,0.5,1)
  
  x1.2<-c(0,0.25,0.5)
  x2.2<-c(0.5,0.5,0)
  
  predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
  names(predict.dat.1)<-c("x1","x2")
  predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
  names(predict.dat.2)<-c("x1","x2")
  
  mm1<-model.matrix(form,predict.dat.1)
  mm2<-model.matrix(form,predict.dat.2)
  
  if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
    
    if(link.name=="regression"){
      link.fun<-gaussian(link = "identity")$linkinv
    }
    
    if(link.name=="loglog"){
      link.fun<-function(theta){exp(-exp(-theta))}
    }
    
    if(link.name=="logc"){
      link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
    }
    
  }else{
    link.fun<-binomial(link = link.name)$linkinv
  }
  
  xb1<-mm1%*%beta
  pry1<-link.fun(xb1)
  
  xb2<-mm2%*%beta
  pry2<-link.fun(xb2)
  
  return(as.vector(pry1-pry2))
  
}

central.fd.true<-function(dgp){
  
  # This is the same as the fd.true function above,
  # but focusing only on the calculation of first differences in 
  # probability at central locations in the data
  
  x1.1<-c(1,0.75,0.5)
  x2.1<-c(0.5,0.5,1)
  
  x1.2<-c(0,0.25,0.5)
  x2.2<-c(0.5,0.5,0)
  
  return(trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))
  
}



#####################
# Marginal Effect Calculation Function
#####################

me<-function(X, beta, dir, form, link.name){
  
  x1<-X[,1]
  x2<-X[,2]
  
  if(dir=="x1"){
    x1.1<-x1+0.01 
    x1.2<-x1-0.01
    x2.1<-x2
    x2.2<-x2
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
      
      if(link.name=="regression"){
        link.fun<-gaussian(link = "identity")$linkinv
      }
      
      if(link.name=="loglog"){
        link.fun<-function(theta){exp(-exp(-theta))}
      }
      
      if(link.name=="logc"){
        link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
      }
      
    }else{
      link.fun<-binomial(link = link.name)$linkinv
    }
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    return(as.vector((pry1-pry2)/0.02))
    
  }
  
  if(dir=="x2"){
    x1.1<-x1
    x1.2<-x1
    x2.1<-1
    x2.2<-0
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
      
      if(link.name=="regression"){
        link.fun<-gaussian(link = "identity")$linkinv
      }
      
      if(link.name=="loglog"){
        link.fun<-function(theta){exp(-exp(-theta))}
      }
      
      if(link.name=="logc"){
        link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
      }
      
    }else{
      link.fun<-binomial(link = link.name)$linkinv
    }
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    if(dir=="x1"){return(as.vector((pry1-pry2)/0.02))}
    if(dir=="x2"){return(as.vector(pry1-pry2))}
  }
  
}

clusterExport(cl, list="me")

me.shell.fun<-function(beta, form, link.name){
  return(c(me(cbind(0.5,0.5), beta, "x1", form, link.name), me(cbind(0.5,0.5), beta, "x2", form, link.name)))
}

me.dif<-function(beta, form, link.name){
  
  x1.1<-c(.5)
  x2.1<-c(1)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(.5)
  x2.2<-c(0)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.1<-me(X.1, beta=beta, dir="x1", form, link.name)
  me.2.1<-me(X.2, beta=beta, dir="x1", form, link.name)
  
  x1.1<-c(1,.75)
  x2.1<-c(.5,.5)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(0,.25)
  x2.2<-c(.5,.5)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.2<-me(X.1, beta=beta, dir="x2", form, link.name)
  me.2.2<-me(X.2, beta=beta, dir="x2", form, link.name)
  
  me.1<-c(me.1.1, me.1.2)
  me.2<-c(me.2.1, me.2.2)
  
  return(me.1-me.2)
  
}


me.true<-function(X, r, dir){
  
  x1<-X[,1]
  x2<-X[,2]
  
  if(dir=="x1"){
    
    x1.1<-x1+0.01 
    x1.2<-x1-0.01
    x2.1<-x2
    x2.2<-x2
    
    return(as.vector((trupry(r,x1.1,x2.1)-trupry(r,x1.2,x2.2))/0.02))
    
  }
  
  if(dir=="x2"){
    
    x1.1<-x1
    x1.2<-x1
    x2.1<-1
    x2.2<-0
    
    return(as.vector((trupry(r,x1.1,x2.1)-trupry(r,x1.2,x2.2))))
    
  }
  
}


me.dif.true<-function(r){
  
  x1.1<-c(.5)
  x2.1<-c(1)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(.5)
  x2.2<-c(0)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.1<-me.true(X.1, r, dir="x1")
  me.2.1<-me.true(X.2, r, dir="x1")
  
  x1.1<-c(1,.75)
  x2.1<-c(.5,.5)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(0,.25)
  x2.2<-c(.5,.5)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.2<-me.true(X.1, r, dir="x2")
  me.2.2<-me.true(X.2, r, dir="x2")
  
  me.1<-c(me.1.1, me.1.2)
  me.2<-c(me.2.1, me.2.2)
  
  return(me.1-me.2)
  
}

second.dif<-function(beta, form, link.name){
  
  x1.1<-c(1,0,1,0)
  x2.1<-c(1,1,0,0)
  
  x1.2<-c(.75,.25,.75,.25)
  x2.2<-c(1,1,0,0)
  
  predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
  names(predict.dat.1)<-c("x1","x2")
  
  predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
  names(predict.dat.2)<-c("x1","x2")
  
  
  mm1<-model.matrix(form,predict.dat.1)
  mm2<-model.matrix(form,predict.dat.2)
  
  
  if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
    
    if(link.name=="regression"){
      link.fun<-gaussian(link = "identity")$linkinv
    }
    
    if(link.name=="loglog"){
      link.fun<-function(theta){exp(-exp(-theta))}
    }
    
    if(link.name=="logc"){
      link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
    }
    
  }else{
    link.fun<-binomial(link = link.name)$linkinv
  }
  
  xb1<-mm1%*%beta
  pry1<-link.fun(xb1)
  
  xb2<-mm2%*%beta
  pry2<-link.fun(xb2)
  
  
  return(c((pry1[1]-pry1[2])-(pry1[3]-pry1[4]), (pry2[1]-pry2[2])-(pry2[3]-pry2[4])))
  
}



second.dif.true<-function(r){
  
  x1.1<-c(1,0,1,0)
  x2.1<-c(1,1,0,0)
  
  x1.2<-c(.75,.25,.75,.25)
  x2.2<-c(1,1,0,0)
  
  pry1<-trupry(r,x1.1,x2.1)
  pry2<-trupry(r,x1.2,x2.2)
  
  return(c((pry1[1]-pry1[2])-(pry1[3]-pry1[4]), (pry2[1]-pry2[2])-(pry2[3]-pry2[4])))
  
}



#####################
# Estimate models on data sets
#####################

# Prepare filenames
filenames<-paste("sim0", 1:9, "D.dta", sep = "")
filenames<-c(filenames,paste("sim", 10:155, "D.dta", sep = ""))

for(r in 1:155){ 
  
  read.dta(filenames[r])->simdatmain
  
  fileConn<-file("A-current-iteration.txt")
  writeLines(c("Current Data Set:", r), fileConn)
  close(fileConn)
  
  # Progress indicator
  print(c("DATA SET: ",r),quote=F)
  pb<-txtProgressBar(min=0, max=reps, initial=0, style=3)
  
  for(i in 1:reps){
    
    setTxtProgressBar(pb, value=i)
    
    model.name.vector<-c()
    model.formula.vector<-c()
    model.link.vector<-c()
    
    # Subset datasets
    simdat<-simdatmain[(1+1000*(i-1)):(1000*i),]
    write.dta(simdat, file="simdat_temp.dta")
    
    # run a bunch of models!
    
    ######
    # logit model, with NO product term
    ######
    model.name<-"logit"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.1<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "logit")
    
    ######
    # logit model, with product term
    ######
    model.name<-"logit_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2 x1x2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.2<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "logit")
    
    
    ######
    # cloglog, with NO product term
    ######
    model.name<-"cloglog"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    
    # save the list of commands to an object
    model.3<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    ######
    # cloglog, with product term
    ######
    model.name<-"cloglog_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.4<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    
    
    
    
    
    ######
    # log, with NO product term
    ######
    model.name<-"log"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.5<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "log")
    
    ######
    # log, with product term
    ######
    model.name<-"log_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.6<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "log")
    
    
    
    
    
    
    ######
    # probit, with NO product term
    ######
    model.name<-"probit"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line3<-"probit y x1 x2, iter(100)"
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.7<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "probit")
    
    ######
    # probit, with product term
    ######
    model.name<-"probit_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line3<-"probit y x1 x2 x1x2, iter(100)"
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.8<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "probit")
    
    
    
    
    
    
    
    ######
    # regression, with NO product term
    ######
    model.name<-"regression"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    # save the list of commands to an object
    model.9<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "regression")
    
    ######
    # regression, with product term
    ######
    model.name<-"regression_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    # save the list of commands to an object
    model.10<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "regression")
    
    
    
    
    
    
    ######
    # loglog model, with NO product term
    ######
    model.name<-"loglog"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.11<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "loglog")
    
    ######
    # loglog, with product term
    ######
    model.name<-"loglog_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.12<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "loglog")
    
    
    ######
    # logc model, with NO product term
    ######
    model.name<-"logc"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.13<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "logc")
    
    ######
    # logc, with product term
    ######
    model.name<-"logc_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.14<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "logc")
    
    ######
    # END OF THE SIMPLE MODELS: RECORD THE LENGTH!
    ###### 
    SM<-length(model.name.vector)
    
    
    
    
    
    
    
    
    
    
    
    ######
    # logit model, x1sqx2
    ######
    model.name<-"logit_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2 x1x2 x1sq x1sqx2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.15<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "logit")
    
    
    
    
    ######
    # cloglog, x1sqx2
    ######
    model.name<-"cloglog_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    
    # save the list of commands to an object
    model.16<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    
    
    
    ######
    # log, x1sqx2
    ######
    model.name<-"log_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.17<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "log")
    
    
    
    
    
    ######
    # probit, x1sqx2
    ######
    model.name<-"probit_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"probit y x1 x2 x1x2 x1sq x1sqx2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.18<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "probit")
    
    
    
    
    
    
    ######
    # regression, x1sqx2
    ######
    model.name<-"regression_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.19<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "regression")
    
    
    
    
    
    ######
    # loglog, x1sqx2
    ######
    model.name<-"loglog_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(loglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.20<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "loglog")
    
    
    
    
    
    ######
    # logc, x1sqx2
    ######
    model.name<-"logc_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.21<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "logc")
    
    
    
    
    
    
    
    
    
    # before executing models, save the list of original (non-selection-criteria) models
    model.name.vector.o<-model.name.vector
    
    # write all the Stata commands to a do file
    fileConn<-file("models.do")
    writeLines(as.vector(vapply(paste("model.",1:length(model.name.vector), sep=""), FUN=get, FUN.VALUE=rep("",22))),fileConn)
    close(fileConn)
    
    # execute that do file
    system(paste("\"C:\\Program Files (x86)\\Stata11\\StataSE-64.exe\" /e do \"",normalizePath(getwd()), "\"\\models.do\"", sep=""))
    
    # recover the beta, vcv, information criteria, convergence status, and hosmer-lemeshow/ROC fit statistics for that model
    for(j in 1:length(model.name.vector)){
      
      temp<-as.matrix(read.table(file=paste("beta_", model.name.vector[j],".txt", sep="")))
      temp<-as.matrix(temp[c(dim(temp)[1],1:(dim(temp)[1]-1)),])
      assign(paste(model.name.vector[j],".beta", sep=""), value=temp)
      
      temp<-as.matrix(read.table(file=paste("vcv_", model.name.vector[j],".txt", sep="")))
      temp<-temp[c(dim(temp)[1],1:(dim(temp)[1]-1)),]
      temp<-temp[,c(dim(temp)[1],1:(dim(temp)[1]-1))]     
      assign(paste(model.name.vector[j],".vcv", sep=""), value=temp)
      
      assign(paste(model.name.vector[j],".ic", sep=""), value=as.matrix(read.table(file=paste("ic_", model.name.vector[j],".txt", sep=""))))
      
      assign(paste(model.name.vector[j],".converge", sep=""), value=as.matrix(read.table(file=paste("converge_", model.name.vector[j],".txt", sep=""))))
      
      assign(paste(model.name.vector[j],".hl", sep=""), value=as.matrix(read.table(file=paste("hl_", model.name.vector[j],".txt", sep=""))))
      assign(paste(model.name.vector[j],".roc", sep=""), value=as.matrix(read.table(file=paste("roc_", model.name.vector[j],".txt", sep=""))))
      
    }
    
    
    
    ###
    # choose the optimal estimators via IC/HL statistics
    ###

    # store the model vectors in a temporary location for augmentation
    model.name.vector.t<-model.name.vector     
    model.formula.vector.t<-model.formula.vector
    model.link.vector.t<-model.link.vector
 
  
    
    # all models, ROC selector
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".roc", sep="")))
      }else{ic.vector<-c(ic.vector, -Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==max(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==max(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==max(ic.vector))])
    
    
    # all models, HL selector
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        cand<-get(paste(model.name.vector[j], ".hl", sep=""))
        if(suppressWarnings(is.na(as.numeric(cand))==FALSE)){
          ic.vector<-c(ic.vector, cand)
        }else{ic.vector<-c(ic.vector, Inf)}
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    
    # all models, bic selector 
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[6])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
	ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    # all models, aic selector
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[5])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
	ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
 
    
    

    # simple models, ROC selector
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".roc", sep="")))
      }else{ic.vector<-c(ic.vector, -Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==max(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==max(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==max(ic.vector))])
    
    
    # simple models, HL selector
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        cand<-get(paste(model.name.vector[j], ".hl", sep=""))
        if(suppressWarnings(is.na(as.numeric(cand))==FALSE)){
          ic.vector<-c(ic.vector, cand)
        }else{ic.vector<-c(ic.vector, Inf)}
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    
    # simple models, bic selector 
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[6])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    # simple models, aic selector
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[5])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    

    if(probit.converge==1 & probit_product.converge==1){
      
      # probit model, roc selector
      if(probit.roc>=probit_product.roc){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, bic selector
      if(probit.ic[6]<=probit_product.ic[6]){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, aic selector
      if(probit.ic[5]<=probit_product.ic[5]){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, hl selector
      if(is.na(probit.hl)==T){probit.hl <- 1e10 + runif(1)}
      if(is.na(probit_product.hl)==T){probit_product.hl <- 1e10 + runif(1)}
      
      if(probit.hl<=probit_product.hl){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, significance selector
      sig<-1.96<=abs(probit_product.beta/sqrt(diag(probit_product.vcv)))[4,1]
      if(sig==F){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }    

    }
    
    if(probit.converge==1 & probit_product.converge==0){
     
      K<-which(model.name.vector=="probit")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(probit.converge==0 & probit_product.converge==1){
      
      K<-which(model.name.vector=="probit_product")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(probit.converge==0 & probit_product.converge==0){
      
      model.name.vector.t<-c(model.name.vector.t, rep(NA, 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(NA, 5))
      model.link.vector.t<-c(model.link.vector.t, rep(NA, 5))             
      
    }
    
    

    if(logit.converge==1 & logit_product.converge==1){
      
      # logit model, roc selector
      if(logit.roc>=logit_product.roc){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      
      # logit model, bic selector
      if(logit.ic[6]<=logit_product.ic[6]){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
  
      # logit model, aic selector 
      if(logit.ic[5]<=logit_product.ic[5]){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
  
  
      # logit model, hl selector
      if(is.na(logit.hl)==T){logit.hl <- 1e10 + runif(1)}
      if(is.na(logit_product.hl)==T){logit_product.hl <- 1e10 + runif(1)}
      
       if(logit.hl<=logit_product.hl){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
  
          
      # logit model, significance selector 
      sig<-1.96<=abs(logit_product.beta/sqrt(diag(logit_product.vcv)))[4,1]
      if(sig==F){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }    
    
    }
    
    if(logit.converge==1 & logit_product.converge==0){
      
      K<-which(model.name.vector=="logit")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(logit.converge==0 & logit_product.converge==1){
      
      K<-which(model.name.vector=="logit_product")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(logit.converge==0 & logit_product.converge==0){
      
      model.name.vector.t<-c(model.name.vector.t, rep(NA, 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(NA, 5))
      model.link.vector.t<-c(model.link.vector.t, rep(NA, 5))       
      
    }
      
    
    
    
    # if this is the first iteration, create a place to store which model was selected
    if(i==1){selected.models<-matrix(data=NA, nrow=reps, ncol=length(model.name.vector.t)-length(model.name.vector))}
    selected.models[i,]<-model.name.vector.t[(length(model.name.vector)+1):length(model.name.vector.t)]
    
    
    
    # draw some observations from the asymptotic multivariate normal distribution of betas for each (non-selection-criterion) model
    for(j in 1:length(model.name.vector)){ 
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        VCV<-get(paste(model.name.vector[j],".vcv", sep=""))
        assign(paste(model.name.vector[j],".beta.draws", sep=""), value=rmvnorm(1000, mean=get(paste(model.name.vector[j],".beta", sep="")), sigma=VCV))
      }else{
        assign(paste(model.name.vector[j],".beta.draws", sep=""), value=matrix(data=NA, nrow=1000, ncol=length(get(paste(model.name.vector[j],".beta", sep="")))))
      }
    }
    
    
    
    # create result storage matrices
    if(i==1){
      
      # model convergence
      convergence.mat<-matrix(data=NA, ncol=length(model.name.vector), nrow=reps)
      
      # 84 first differences spread over the space
      fd.list<-list()
      fd.ci.list<-list()
      fd.ci.width.list<-list()
      
      # 4 central FDs, one for each model
      central.fd.list<-list()
      central.fd.ci.list<-list()
      central.fd.ci.width.list<-list()
      
      # 2 ME calculations for each model
      me.list<-list()
      me.ci.list<-list()
      me.ci.width.list<-list()
      
      # 4 ME difference calculations for each model
      me.dif.list<-list()
      me.dif.ci.list<-list()
      me.dif.ci.width.list<-list()
      
      # 2 second difference calculations for each model
      second.dif.list<-list()
      second.dif.ci.list<-list()
      second.dif.ci.width.list<-list()
      
      for(j in 1:length(model.name.vector.t)){
        
        fd.list[[j]]<-matrix(data=NA, nrow=reps, ncol=25, dimnames=NULL)  
        fd.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=25, dimnames=NULL)
        fd.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=25, dimnames=NULL)
        central.fd.list[[j]]<-matrix(data=NA, nrow=reps, ncol=3, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0")))
        central.fd.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=3, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0")))
        central.fd.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=3, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0")))        
        me.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_0.5_0.5", "x2_1_0")))
        me.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_0.5_0.5", "x2_1_0")))
        me.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_0.5_0.5", "x2_1_0")))
        me.dif.list[[j]]<-matrix(data=NA, nrow=reps, ncol=3, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0")))
        me.dif.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=3, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0")))
        me.dif.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=3, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0")))
        second.dif.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25")))
        second.dif.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25")))
        second.dif.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25")))
        
      }
      colnames(convergence.mat)<-model.name.vector
      names(fd.list)<-model.name.vector.t
      names(fd.ci.list)<-model.name.vector.t
      names(fd.ci.width.list)<-model.name.vector.t
      names(central.fd.list)<-model.name.vector.t
      names(central.fd.ci.list)<-model.name.vector.t
      names(central.fd.ci.width.list)<-model.name.vector.t
      names(me.dif.list)<-model.name.vector.t
      names(me.dif.ci.list)<-model.name.vector.t
      names(me.dif.ci.width.list)<-model.name.vector.t
      names(second.dif.list)<-model.name.vector.t
      names(second.dif.ci.list)<-model.name.vector.t
      names(second.dif.ci.width.list)<-model.name.vector.t
    }
    
    # calculate performance statistics, but only for the non-conditional models
    for(j in 1:length(model.name.vector)){  
      
      convergence.mat[i,j]<-get(paste(model.name.vector[j], ".converge", sep=""))

      if(1!=get(paste(model.name.vector[j], ".converge", sep=""))){
        fd.list[[j]][i,]<-rep(NA, dim(fd.list[[j]])[2])
        central.fd.list[[j]][i,]<-rep(NA, dim(central.fd.list[[j]])[2])
        me.list[[j]][i,]<-rep(NA, dim(me.list[[j]])[2])
        me.dif.list[[j]][i,]<-rep(NA, dim(me.dif.list[[j]])[2])
        second.dif.list[[j]][i,]<-rep(NA, dim(second.dif.list[[j]])[2])
        fd.ci.list[[j]][i,]<-rep(NA, dim(fd.ci.list[[j]])[2])
        central.fd.ci.list[[j]][i,]<-rep(NA, dim(central.fd.ci.list[[j]])[2])
        me.ci.list[[j]][i,]<-rep(NA, dim(me.ci.list[[j]])[2])
        me.dif.ci.list[[j]][i,]<-rep(NA, dim(me.dif.ci.list[[j]])[2])
        second.dif.ci.list[[j]][i,]<-rep(NA, dim(second.dif.ci.list[[j]])[2])
        fd.ci.width.list[[j]][i,]<-rep(NA, dim(fd.ci.width.list[[j]])[2])
        central.fd.ci.width.list[[j]][i,]<-rep(NA, dim(central.fd.ci.width.list[[j]])[2])
        me.ci.width.list[[j]][i,]<-rep(NA, dim(me.ci.width.list[[j]])[2])
        me.dif.ci.width.list[[j]][i,]<-rep(NA, dim(me.dif.ci.width.list[[j]])[2])
        second.dif.ci.width.list[[j]][i,]<-rep(NA, dim(second.dif.ci.width.list[[j]])[2])
        
        
      }else{
      
        # global first differences
        fd.list[[j]][i,]<-fd(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        # central first differences
        central.fd.list[[j]][i,]<-central.fd(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        # central marginal effects
        me.list[[j]][i,]<-c(me(cbind(0.5,0.5), dir="x1", beta=get(paste(model.name.vector[j],".beta", sep="")), form=model.formula.vector[[j]], link.name=model.link.vector[j]), me(cbind(0.5,0.5), dir="x2", beta=get(paste(model.name.vector[j],".beta", sep="")), form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        
        # marginal effect differences
        me.dif.list[[j]][i,]<-me.dif(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        # second differences
        second.dif.list[[j]][i,]<-second.dif(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        
        ####
        # calculate 95% CI of FD and ME and ME Difs
        ####
        
        # storage for simulated first differences
        fd.draws<-matrix(data=NA, nrow=1000, ncol=25)
        central.fd.draws<-matrix(data=NA, nrow=1000, ncol=3)
        me.draws<-matrix(data=NA, nrow=1000, ncol=2)
        me.dif.draws<-matrix(data=NA, nrow=1000, ncol=3)
        second.dif.draws<-matrix(data=NA, nrow=1000, ncol=2)
        
        
        # get the simulated beta
        b.t<-get(paste(model.name.vector[j],".beta.draws", sep=""))
        
        # draw a bunch of stuff using parallel computing
        fd.draws<-t(parApply(cl, X=b.t, FUN=fd, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        central.fd.draws<-t(parApply(cl, X=b.t, FUN=central.fd, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        me.draws<-t(parApply(cl, X=b.t, FUN=me.shell.fun, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        me.dif.draws<-t(parApply(cl, X=b.t, FUN=me.dif, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        second.dif.draws<-t(parApply(cl, X=b.t, FUN=second.dif, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        
        # get the true first difference/MEs
        fd.observed<-fd.true(r)
        central.fd.observed<-central.fd.true(r)
        me.observed<-c(me.true(cbind(0.5,0.5), r, "x1"), me.true(cbind(0.5,0.5), r, "x2"))
        me.dif.observed<-me.dif.true(r)
        second.dif.observed<-second.dif.true(r)
        
        # is the true FD/ME inside the 95% CI for the simulated values?
        fd.ci<-c()
        central.fd.ci<-c()
        me.ci<-c()
        me.dif.ci<-c()
        second.dif.ci<-c()
        fd.ci.width<-c()
        central.fd.ci.width<-c()
        me.ci.width<-c()
        me.dif.ci.width<-c()
        second.dif.ci.width<-c()
        
        for(k in 1:25){
          fd.ci[k]<-(ecdf(fd.draws[,k])(fd.observed[k])>=0.025 & ecdf(fd.draws[,k])(fd.observed[k])<=0.975)
          fd.ci.width[k]<-quantile(fd.draws[,k], probs=0.975)-quantile(fd.draws[,k], probs=0.025)
        }
        for(k in 1:3){
          central.fd.ci[k]<-(ecdf(central.fd.draws[,k])(central.fd.observed[k])>=0.025 & ecdf(central.fd.draws[,k])(central.fd.observed[k])<=0.975)
          central.fd.ci.width[k]<-quantile(central.fd.draws[,k], probs=0.975)-quantile(central.fd.draws[,k], probs=0.025)
        }
        for(k in 1:2){
          me.ci[k]<-(ecdf(me.draws[,k])(me.observed[k])>=0.025 & ecdf(me.draws[,k])(me.observed[k])<=0.975)
          me.ci.width[k]<-quantile(me.draws[,k], probs=0.975)-quantile(me.draws[,k], probs=0.025)
        }
        for(k in 1:3){
          me.dif.ci[k]<-(ecdf(me.dif.draws[,k])(me.dif.observed[k])>=0.025 & ecdf(me.dif.draws[,k])(me.dif.observed[k])<=0.975)
          me.dif.ci.width[k]<-quantile(me.dif.draws[,k], probs=0.975)-quantile(me.dif.draws[,k], probs=0.025)
        }
        for(k in 1:2){
          second.dif.ci[k]<-(ecdf(second.dif.draws[,k])(second.dif.observed[k])>=0.025 & ecdf(second.dif.draws[,k])(second.dif.observed[k])<=0.975)
          second.dif.ci.width[k]<-quantile(second.dif.draws[,k], probs=0.975)-quantile(second.dif.draws[,k], probs=0.025)
        }
        
        fd.ci.list[[j]][i,]<-fd.ci
        central.fd.ci.list[[j]][i,]<-central.fd.ci
        me.ci.list[[j]][i,]<-me.ci
        me.dif.ci.list[[j]][i,]<-me.dif.ci
        second.dif.ci.list[[j]][i,]<-second.dif.ci
        
        fd.ci.width.list[[j]][i,]<-fd.ci.width
        central.fd.ci.width.list[[j]][i,]<-central.fd.ci.width
        me.ci.width.list[[j]][i,]<-me.ci.width
        me.dif.ci.width.list[[j]][i,]<-me.dif.ci.width
        second.dif.ci.width.list[[j]][i,]<-second.dif.ci.width
        
        
      }
      
    }
    
  # now, fill in the appropriate values for all the selection models
  for(j in (length(model.name.vector)+1):(length(model.name.vector.t))){
    
    
    if(is.na(model.name.vector.t[j])){

        fd.list[[j]][i,]<-rep(NA, dim(fd.list[[j]])[2])
        central.fd.list[[j]][i,]<-rep(NA, dim(central.fd.list[[j]])[2])
        me.list[[j]][i,]<-rep(NA, dim(me.list[[j]])[2])
        me.dif.list[[j]][i,]<-rep(NA, dim(me.dif.list[[j]])[2])
        second.dif.list[[j]][i,]<-rep(NA, dim(second.dif.list[[j]])[2])
        fd.ci.list[[j]][i,]<-rep(NA, dim(fd.ci.list[[j]])[2])
        central.fd.ci.list[[j]][i,]<-rep(NA, dim(central.fd.ci.list[[j]])[2])
        me.ci.list[[j]][i,]<-rep(NA, dim(me.ci.list[[j]])[2])
        me.dif.ci.list[[j]][i,]<-rep(NA, dim(me.dif.ci.list[[j]])[2])
        second.dif.ci.list[[j]][i,]<-rep(NA, dim(second.dif.ci.list[[j]])[2])
        fd.ci.width.list[[j]][i,]<-rep(NA, dim(fd.ci.width.list[[j]])[2])
        central.fd.ci.width.list[[j]][i,]<-rep(NA, dim(central.fd.ci.width.list[[j]])[2])
        me.ci.width.list[[j]][i,]<-rep(NA, dim(me.ci.width.list[[j]])[2])
        me.dif.ci.width.list[[j]][i,]<-rep(NA, dim(me.dif.ci.width.list[[j]])[2])
        second.dif.ci.width.list[[j]][i,]<-rep(NA, dim(second.dif.ci.width.list[[j]])[2])
        
        
      }else{      
      
        k=which(model.name.vector.t[j]==model.name.vector)
        
        fd.list[[j]][i,]<-fd.list[[k]][i,]
        central.fd.list[[j]][i,]<-central.fd.list[[k]][i,]
        me.list[[j]][i,]<-me.list[[k]][i,]   
        me.dif.list[[j]][i,]<-me.dif.list[[k]][i,]
        second.dif.list[[j]][i,]<-second.dif.list[[k]][i,]
        
        fd.ci.list[[j]][i,]<-fd.ci.list[[k]][i,]
        central.fd.ci.list[[j]][i,]<-central.fd.ci.list[[k]][i,]
        me.ci.list[[j]][i,]<-me.ci.list[[k]][i,]
        me.dif.ci.list[[j]][i,]<-me.dif.ci.list[[k]][i,]
        second.dif.ci.list[[j]][i,]<-second.dif.ci.list[[k]][i,]
        
        fd.ci.width.list[[j]][i,]<-fd.ci.width.list[[k]][i,]
        central.fd.ci.width.list[[j]][i,]<-central.fd.ci.width.list[[k]][i,]
        me.ci.width.list[[j]][i,]<-me.ci.width.list[[k]][i,]
        me.dif.ci.width.list[[j]][i,]<-me.dif.ci.width.list[[k]][i,]
        second.dif.ci.width.list[[j]][i,]<-second.dif.ci.width.list[[k]][i,]
        
    
      }
  }

  # now set the link, name, and formula vectors to be their appended version
  model.name.vector<-model.name.vector.t
  model.link.vector<-model.link.vector.t
  model.formula.vector<-model.formula.vector.t
    
  }
  close(pb)
  
  
  ####################
  # store all the measurements for each DGP
  ####################
  
  # reset the model name vector to accurately reflect the automatic selection procedures
  selection.model.names<-c("roc", "hl", "bic", "aic", "roc-simp", "hl-simp", "bic-simp", "aic-simp", "probit_roc", "probit_bic", "probit_aic", "probit_hl", "probit_sig", "logit_roc", "logit_bic",  "logit_aic", "logit_hl", "logit_sig")
  model.name.vector[(length(model.name.vector)-length(selection.model.names)+1):length(model.name.vector)]<-selection.model.names
  
  # if this is the first DGP, create the storage matrices for the overall run
  if(r==1){
    
    # convergence storage
    convergence.report.mat<-matrix(data=NA, nrow=155, ncol=dim(convergence.mat)[2])
    colnames(convergence.report.mat)<-model.name.vector[1:(length(model.name.vector)-length(selection.model.names))]

    # RMSE Storage
    fd.rmse<-array(data=NA, dim=c(155, 25, length(model.formula.vector)), dimnames=list(NULL, NULL, model.name.vector)) 
    central.fd.rmse<-array(data=NA, dim=c(155, 3, length(model.formula.vector)), dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0"), model.name.vector))
    me.rmse<-array(data=NA, dim=c(155, 2, length(model.formula.vector)), dimnames=list(NULL, c("x1_0.5_0.5", "x2_1_0"), model.name.vector))
    me.dif.rmse<-array(data=NA, dim=c(155, 3, length(model.formula.vector)), dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0"), model.name.vector))
    second.dif.rmse<-array(data=NA, dim=c(155, 2, length(model.formula.vector)), dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25"), model.name.vector))
    
    # CI storage
    fd.ci.count<-array(data=NA, dim=c(155, 25, length(model.formula.vector)))
    central.fd.ci.count<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    me.ci.count<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.ci.count<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    second.dif.ci.count<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    # CI width storage
    fd.ci.width.mean<-array(data=NA, dim=c(155, 25, length(model.formula.vector)))
    central.fd.ci.width.mean<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    me.ci.width.mean<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.ci.width.mean<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    second.dif.ci.width.mean<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    
    # bias storage
    fd.bias<-array(data=NA, dim=c(155, 25, length(model.formula.vector)))
    central.fd.bias<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    me.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.bias<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    second.dif.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))

    # absolute bias storage
    fd.abs.bias<-array(data=NA, dim=c(155, 25, length(model.formula.vector)))
    central.fd.abs.bias<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    me.abs.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.abs.bias<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    second.dif.abs.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    # error SD storage
    fd.error.sd<-array(data=NA, dim=c(155, 25, length(model.formula.vector)))
    central.fd.error.sd<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    me.error.sd<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.error.sd<-array(data=NA, dim=c(155, 3, length(model.formula.vector)))
    second.dif.error.sd<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    # automatic selection criteria storage
    selected.models.store<-array(data=NA, dim=c(155,reps,dim(selected.models)[2]), dimnames=list(NULL, NULL, selection.model.names))
    
    # which model, between product and no product, was closer to the second difference?
    second.dif.which.closer<-matrix(data=NA, nrow=155, ncol=7)
    
  }
  
  # store the convergence status of models
  convergence.report.mat[r,]<-colSums(convergence.mat)
  
  
  # store which models were selected for each of the automated criteria
  selected.models.store[r,,]<-selected.models
  
  # RMSEs for all measurements
  sqerror<-function(result, true){
    return((result-true)^2)
  }
  
  true.temp<-matrix(data=fd.true(r), byrow=T, nrow=reps, ncol=length(fd.true(r)))
  temp<-lapply(X=fd.list, FUN=sqerror, true=true.temp)
  fd.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  true.temp<-matrix(data=central.fd.true(r), byrow=T, nrow=reps, ncol=length(central.fd.true(r)))
  temp<-lapply(X=central.fd.list, FUN=sqerror, true=true.temp)
  central.fd.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  me.true.temp.store<-c(me.true(cbind(0.5,0.5), r=r, dir="x1"), me.true(cbind(0.5,0.5), r=r, dir="x2"))
  true.temp<-matrix(data=me.true.temp.store, byrow=T, nrow=reps, ncol=length(me.true.temp.store))
  temp<-lapply(X=me.list, FUN=sqerror, true=true.temp)
  me.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  true.temp<-matrix(data=me.dif.true(r), byrow=T, nrow=reps, ncol=length(me.dif.true(r)))
  temp<-lapply(X=me.dif.list, FUN=sqerror, true=true.temp)
  me.dif.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  true.temp<-matrix(data=second.dif.true(r), byrow=T, nrow=reps, ncol=length(second.dif.true(r)))
  temp<-lapply(X=second.dif.list, FUN=sqerror, true=true.temp)
  second.dif.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  
  # CI storage for all measurements
  
  for(j in 1:length(model.name.vector)){
    fd.ci.count[r,,j]<-apply(X=fd.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    central.fd.ci.count[r,,j]<-apply(X=central.fd.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    me.ci.count[r,,j]<-apply(X=me.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    me.dif.ci.count[r,,j]<-apply(X=me.dif.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    second.dif.ci.count[r,,j]<-apply(X=second.dif.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    
    fd.ci.width.mean[r,,j]<-apply(X=fd.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    central.fd.ci.width.mean[r,,j]<-apply(X=central.fd.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    me.ci.width.mean[r,,j]<-apply(X=me.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    me.dif.ci.width.mean[r,,j]<-apply(X=me.dif.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    second.dif.ci.width.mean[r,,j]<-apply(X=second.dif.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    
  }# note: NA removal means that non-converged models will count as "misses" for CI storage purposes
  
  # Bias and SD storage for all measurements
  bias<-function(measured, true){
    return(na.omit(measured-true))
  }
  
  colSDs<-function(mat){
    return(apply(X=mat, FUN=sd, MARGIN=2, na.rm=T))
  }
  
  true.temp<-matrix(data=fd.true(r), byrow=T, nrow=reps, ncol=length(fd.true(r)))
  temp<-lapply(FUN=bias, X=fd.list, true=true.temp)
  fd.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  fd.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  fd.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=central.fd.true(r), byrow=T, nrow=reps, ncol=length(central.fd.true(r)))
  temp<-lapply(FUN=bias, X=central.fd.list, true=true.temp)
  central.fd.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  central.fd.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  central.fd.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=me.true.temp.store, byrow=T, nrow=reps, ncol=length(me.true.temp.store))
  temp<-lapply(FUN=bias, X=me.list, true=true.temp)
  me.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  me.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  me.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=me.dif.true(r), byrow=T, nrow=reps, ncol=length(me.dif.true(r)))
  temp<-lapply(FUN=bias, X=me.dif.list, true=true.temp)
  me.dif.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  me.dif.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  me.dif.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=second.dif.true(r), byrow=T, nrow=reps, ncol=length(second.dif.true(r)))
  temp<-lapply(FUN=bias, X=second.dif.list, true=true.temp)
  second.dif.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  second.dif.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  second.dif.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  # which of the simple estimators got closer for the second difference?
  bias.na<-function(measured, true){
    return(measured-true)
  }
  true.temp<-matrix(data=second.dif.true(r), byrow=T, nrow=reps, ncol=length(second.dif.true(r)))
  temp<-lapply(FUN=bias.na, X=second.dif.list, true=true.temp)
  temp.abs<-lapply(FUN=abs, X=temp)
  
  second.dif.which.closer[r,1]<-sum(1==apply(X=cbind(temp.abs[[1]][,1], temp.abs[[2]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[1]][,1], temp.abs[[2]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,2]<-sum(1==apply(X=cbind(temp.abs[[3]][,1], temp.abs[[4]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[3]][,1], temp.abs[[4]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,3]<-sum(1==apply(X=cbind(temp.abs[[5]][,1], temp.abs[[6]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[5]][,1], temp.abs[[6]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,4]<-sum(1==apply(X=cbind(temp.abs[[7]][,1], temp.abs[[8]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[7]][,1], temp.abs[[8]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,5]<-sum(1==apply(X=cbind(temp.abs[[9]][,1], temp.abs[[10]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[9]][,1], temp.abs[[10]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,6]<-sum(1==apply(X=cbind(temp.abs[[11]][,1], temp.abs[[12]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[11]][,1], temp.abs[[12]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,7]<-sum(1==apply(X=cbind(temp.abs[[13]][,1], temp.abs[[14]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[13]][,1], temp.abs[[14]][,1]), FUN=which.min, MARGIN=1))))

  
}




# Post-processing and analysis

# reference indexes for each type of DGP
continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115

linear.additive<-1:24
linear.secdif.10<-50:53
linear.secdif.20<-62:65
linear.secdif.30<-74:79
linear.secdif.40<-92:103
linear.secdif.50<-104:107

nonlinear.additive<-25:49
nonlinear.secdif.10<-54:61
nonlinear.secdif.20<-66:73
nonlinear.secdif.30<-80:91
nonlinear.secdif.40<-96:103
nonlinear.secdif.50<-108:115

additive.null<-1:9
additive.x.null<-10:14
additive.nonull<-15:24
nonlinear.x.null<-25:39
nonlinear.nonull<-40:49
additive.x.null.two<-c(10:14,25:39)
additive.nonull.two<-c(15:24,40:49)

analysis.titles<-c("additive", "min-max second difference = .10", "min-max second difference = .20", "min-max second difference = .30", "min-max second difference = .40", "min-max second difference = .50", "linear additive", "linear min-max second difference = .10", "linear min-max second difference = .20", "linear min-max second difference = .30", "linear min-max second difference = .40", "linear min-max second difference = .50", "nonlinear additive", "nonlinear min-max second difference = .10", "nonlinear min-max second difference = .20", "nonlinear min-max second difference = .30", "nonlinear min-max second difference = .40", "nonlinear min-max second difference = .50", "additive null", "additive x null z not", "additive no null", "nonlinear x null z not", "nonlinear no null", "additive x null z not expanded", "additive no null expanded")

analysis.indices<-list(continuous.additive, min.max.secdif.10, min.max.secdif.20, min.max.secdif.30, min.max.secdif.40, min.max.secdif.50, linear.additive, linear.secdif.10, linear.secdif.20, linear.secdif.30, linear.secdif.40, linear.secdif.50, nonlinear.additive, nonlinear.secdif.10, nonlinear.secdif.20, nonlinear.secdif.30, nonlinear.secdif.40, nonlinear.secdif.50, additive.null, additive.x.null, additive.nonull, nonlinear.x.null, nonlinear.nonull, additive.x.null.two, additive.nonull.two)

names(analysis.indices)<-analysis.titles


# now print out all the analysis!

# First differences
x1.1a<-c(rep(1, 2), rep(.75, 2), seq(from=0,to=1,by=0.05))
x2.1a<-c(0,1,0,1, rep(1, 21))
x1.2a<-c(rep(0, 2), rep(.25, 2), seq(from=0,to=1,by=0.05))
x2.2a<-c(0,1,0,1, rep(0, 21))


fd.mat<-cbind(x1.1a,x2.1a,NA,x1.2a,x2.2a)
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  fd.mat.temp<-cbind(apply(X=fd.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps)
  colnames(fd.mat.temp)<-result.labels
  fd.mat<-cbind(fd.mat, fd.mat.temp)
  
}

write.csv(file="D_first_difference_results.csv", t(fd.mat))




# Central first differences
central.fd.mat<-matrix(data=NA, ncol=0, nrow=dim(central.fd.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  central.fd.mat.temp<-cbind(apply(X=central.fd.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps)
  colnames(central.fd.mat.temp)<-result.labels
  central.fd.mat<-cbind(central.fd.mat, central.fd.mat.temp)
  
}
write.csv(file="D_central_fd_results.csv", t(central.fd.mat))



# Marginal effects
me.mat<-matrix(data=NA, ncol=0, nrow=dim(me.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  me.mat.temp<-cbind(apply(X=me.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps)
  colnames(me.mat.temp)<-result.labels
  me.mat<-cbind(me.mat, me.mat.temp)
  
}
write.csv(file="D_me_results.csv", t(me.mat))




# Marginal effects differences
me.dif.mat<-matrix(data=NA, ncol=0, nrow=dim(me.dif.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  me.dif.mat.temp<-cbind(apply(X=me.dif.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps)
  colnames(me.dif.mat.temp)<-result.labels
  me.dif.mat<-cbind(me.dif.mat, me.dif.mat.temp)
  
}
write.csv(file="D_me_dif_results.csv", t(me.dif.mat))





# Second differences
second.dif.mat<-matrix(data=NA, ncol=0, nrow=dim(second.dif.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  second.dif.mat.temp<-cbind(apply(X=second.dif.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps)
  colnames(second.dif.mat.temp)<-result.labels
  rownames(second.dif.mat.temp)<-c("2nddif_1_0", "2nddif_0.75_0.25")
  second.dif.mat<-cbind(second.dif.mat, second.dif.mat.temp)
  
}
write.csv(file="D_second_difference_results.csv", t(second.dif.mat))






# how often was each model selected?
selection.counts<-array(data=NA, dim=c(155,length(model.name.vector.o),length(selection.model.names)), dimnames=c(NULL, model.name.vector.o, selection.model.names))
for(k in 1:length(selection.model.names)){
  
  for(i in 1:155){
    
    for(j in 1:length(model.name.vector.o)){
      
      selection.counts[i,j,k]<-length(which(model.name.vector.o[j]==selected.models.store[i,,k]))
      
    }
    
  }
  
}


for(i in 1:length(analysis.indices)){
	selection.counts.temp<-matrix(data=0, nrow=length(model.name.vector.o), ncol=length(selection.model.names))
	for(m in 1:length(analysis.indices[[i]])){
		selection.counts.temp<-selection.counts.temp+selection.counts[analysis.indices[[i]][m],,]
	}
	colnames(selection.counts.temp)<-selection.model.names
	rownames(selection.counts.temp)<-model.name.vector.o
	write.csv(file=paste("Dz_",analysis.titles[i],"_selected_models.csv", sep=""), selection.counts.temp)  
}




# which model (product or no product) was closer to the true second difference?
names(second.dif.which.closer)<-c("logit", "cloglog", "log", "probit", "regression", "loglog", "logc")

second.dif.which.closer.summary<-matrix(data=NA, ncol=7, nrow=length(analysis.indices))
names(second.dif.which.closer.summary)<-c("logit", "cloglog", "log", "probit", "regression", "loglog", "logc")

rownames(second.dif.which.closer.summary)<-analysis.titles
colnames(second.dif.which.closer.summary)<-c("logit", "cloglog", "log", "probit", "regression", "loglog", "logc")
for(i in 1:length(analysis.indices)){

    second.dif.which.closer.summary[i,]<-colMeans(second.dif.which.closer[analysis.indices[[i]],])
  
}

write.csv(file="D_pro_nopro_second_dif_comparison.csv", second.dif.which.closer)
write.csv(file="D_pro_nopro_grouped_second_dif_comparison.csv", second.dif.which.closer.summary)


# how often did each model converge?
write.csv(file="D_convergence.report.csv", convergence.report.mat)




stopCluster(cl)


save.image("2015-3-9_dichotomous-sim-results.RData")











###################################################################
#
# Part Three: analysis of model results on Monte Carlo data sets
# 
###################################################################

rm(list=ls())
load("2015-3-9_dichotomous-sim-results.RData")
library(ggplot2)

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115
analysis.indices<-list(continuous.additive, min.max.secdif.10, min.max.secdif.20, min.max.secdif.30, min.max.secdif.40, min.max.secdif.50)
analysis.names<-c("continuous additive", "second difference = 0.1", "second difference = 0.2", "second difference = 0.3", "second difference = 0.4", "second difference = 0.5")

# make a "pretty" model name vector
model.name.pretty <- model.name.vector
target.models<-c(1,2,15,7,8,18,9,10,19,25,24,35,34)
model.name.pretty[target.models]<-c("2A: logit basic", "2B: logit product", "2C: logit X2", "3A: probit basic", "3B: probit product", "3C: probit X2", "1A: regression basic", "1B: regression product", "1C: regression X2", "8: AIC", "9: BIC", "16: logit significance", "17: probit significance")






# display the confidence interval performance 
ci.plot.data<-t(second.dif.ci.count[1:115,1,][,target.models])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.add<-t(second.dif.ci.count[analysis.indices[[1]],1,][,target.models])
ci.plot.vector.add<-round(100*apply(X=ci.plot.data.add, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.weak<-t(second.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,target.models])
ci.plot.vector.weak<-round(100*apply(X=ci.plot.data.weak, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.strong<-t(second.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,target.models])
ci.plot.vector.strong<-round(100*apply(X=ci.plot.data.strong, FUN=mean, MARGIN=1)/25, 1)
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot.vector, ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong, group=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]))

groups<-rep(c("additive", "weak interaction", "strong interaction"), each=length(target.models))
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot = c(ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong), model.name=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]), groups=factor(groups, levels=c("additive", "weak interaction", "strong interaction")))
p<-ggplot(df)
p+geom_point(aes(ci.plot, model.name, shape=groups), size=3)+ xlab("% of Data Sets with 95% Confidence Interval Containing True Value") + ylab("Estimation Model") + theme(strip.text.y = element_text(angle=0, size=16) )+ theme(axis.text = element_text(size=16, color="black"), axis.title = element_text(size=16), legend.text = element_text(size=16), legend.title = element_text(size=16))+geom_hline(yintercept=7.5)+scale_shape_discrete(name="DGP Category")
# Figure S-3 (a): second difference CI performance
dev.copy2pdf(file="conf_interval_performance_d.pdf")

best.ind <- which(ci.plot.vector.add>=90 & ci.plot.vector.weak>=90 & ci.plot.vector.strong>=90)
best.ind <- which(model.name.pretty %in% model.name.pretty[target.models][best.ind])

# second difference abs bias levels
# Additive Models (DICHOTOMOUS)
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
ci.plot.data<-t(second.dif.ci.count[analysis.indices[[1]],1,])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
#best.ind<-which((ci.plot.vector)>92.5 & ci.plot.vector<97.5)
#best.ind<-best.ind[which(best.ind %in% target.models)] # excludes the bad roc/hl models
ci.plot.vector<-ci.plot.vector[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
additive.plot.data<-t(second.dif.abs.bias[analysis.indices[[1]],1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])

df<-data.frame(additive.plot.vector, group=rep(group.label, 49))

additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label



# second difference abs bias
# Strongly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# now the best CI models
ci.plot.data<-t(second.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),1,])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
#best.ind<-which((ci.plot.vector)>92.5 & ci.plot.vector<97.5)
#best.ind<-best.ind[which(best.ind %in% target.models)] # excludes the bad roc/hl models
ci.plot.vector<-ci.plot.vector[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
o<-order(ci.plot.vector, decreasing=F)
additive.plot.data<-t(second.dif.abs.bias[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])

df<-data.frame(additive.plot.vector, group=rep(group.label, 24))

additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label



# second difference Abs bias
# Weakly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
ci.plot.data<-t(second.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
#best.ind<-which((ci.plot.vector)>92.5 & ci.plot.vector<97.5)
#best.ind<-best.ind[which(best.ind %in% target.models)] # excludes the bad roc/hl models
ci.plot.vector<-ci.plot.vector[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
o<-order(ci.plot.vector, decreasing=F)
additive.plot.data<-t(second.dif.abs.bias[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])

df<-data.frame(additive.plot.vector, group=rep(group.label, 42))

additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label



# create a comprehensive absolute bias plot for second differences
# start by ordering the elements
additive.plot.data<-t(second.dif.abs.bias[1:115,1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(-0.01, 0.16)) +  ylab("Avg. Absolute Error in Estimate of Min-Max Second Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0)) + theme(strip.text.y = element_text(angle=0, size=16) )+ theme(axis.text = element_text(size=16, color="black"), axis.title = element_text(size=16), legend.text = element_text(size=16), legend.title = element_text(size=16))
# Figure S-4(a)
dev.copy2pdf(file="second-difference-bias-d.pdf")






# second difference CI widths
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.ci.width.mean[analysis.indices[[1]],1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label


# second difference CI Widths
# Strongly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# now the best CI models
additive.plot.data<-t(second.dif.ci.width.mean[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label


# second difference CI widths
# Weakly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.ci.width.mean[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label



# Auxiliary plot (not used in paper) 
# Width of 95% confidence intervals for min-max second difference
# create a comprehensive absolute bias plot for second differences
# start by ordering the elements
additive.plot.data<-t(second.dif.ci.width.mean[1:115,1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(0.2, 0.45)) +  ylab("Mean CI Width of Second Difference Estimate (by DGP)") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))

dev.copy2pdf(file="second-difference-ci-width-D.pdf")




rm(list=ls())
load("2015-3-9_dichotomous-sim-results.RData")
library(ggplot2)

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115
analysis.indices<-list(continuous.additive, min.max.secdif.10, min.max.secdif.20, min.max.secdif.30, min.max.secdif.40, min.max.secdif.50)
analysis.names<-c("continuous additive", "second difference = 0.1", "second difference = 0.2", "second difference = 0.3", "second difference = 0.4", "second difference = 0.5")

# make a "pretty" model name vector
model.name.pretty <- model.name.vector
target.models<-c(1,2,15,7,8,18,9,10,19,25,24,35,34)
model.name.pretty[target.models]<-c("2A: logit basic", "2B: logit product", "2C: logit X2", "3A: probit basic", "3B: probit product", "3C: probit X2", "1A: regression basic", "1B: regression product", "1C: regression X2", "8: AIC", "9: BIC", "16: logit significance", "17: probit significance")


# Marginal Effect abs bias levels
# display the confidence interval performance 
ci.plot.data<-me.dif.ci.count[1:115,c(1,2),target.models]
ci.plot.data<-aperm(ci.plot.data, c(3,1,2))
dim(ci.plot.data)<-c(13,115*2)
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.strong<-me.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),c(1,2),target.models]
ci.plot.data.strong<-aperm(ci.plot.data.strong, c(3,1,2))
dim(ci.plot.data.strong)<-c(13,24*2)
ci.plot.vector.strong<-round(100*apply(X=ci.plot.data.strong, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.weak<-me.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),c(1,2),target.models]
ci.plot.data.weak<-aperm(ci.plot.data.weak, c(3,1,2))
dim(ci.plot.data.weak)<-c(13,42*2)
ci.plot.vector.weak<-round(100*apply(X=ci.plot.data.weak, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.add<-me.dif.ci.count[c(analysis.indices[[1]]),c(1,2),target.models]
ci.plot.data.add<-aperm(ci.plot.data.add, c(3,1,2))
dim(ci.plot.data.add)<-c(13,49*2)
ci.plot.vector.add<-round(100*apply(X=ci.plot.data.add, FUN=mean, MARGIN=1)/25, 1)

o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot.vector, ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong, group=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]))

groups<-rep(c("additive", "weak interaction", "strong interaction"), each=length(target.models))
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot = c(ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong), model.name=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]), groups=factor(groups, levels=c("additive", "weak interaction", "strong interaction")))
p<-ggplot(df)
p+geom_point(aes(ci.plot, model.name, shape=groups), size=3)+ xlab("% of True Values in 95% Confidence Interval") + ylab("Model Specification")+ theme(strip.text.y = element_text(angle=0, size=16) )+ theme(axis.text = element_text(size=16, color="black"), axis.title = element_text(size=16), legend.text = element_text(size=16), legend.title = element_text(size=16))+geom_hline(yintercept=7.5)+scale_shape_discrete(name="DGP Category")
# Figure S-3(b)
dev.copy2pdf(file="me_conf_interval_performance_d.pdf")

best.ind<-which(ci.plot.vector.add>=90 & ci.plot.vector.weak>=90 & ci.plot.vector.strong>=90)
best.ind <- which(model.name.pretty %in% model.name.pretty[target.models][best.ind])



# Strongly Interactive Models
ci.plot.vector<-ci.plot.vector.strong[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
o<-order(ci.plot.vector, decreasing=F)
additive.plot.data<-t(me.dif.abs.bias[analysis.indices[[5]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[5]],2,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[6]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[6]],2,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])

df<-data.frame(additive.plot.vector, group=rep(group.label, 24))
textdat<-data.frame(ci.label.vector, group=group.label, y_pos=rep(0.30, 24))

additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label





# me difference abs bias levels
# Moderately Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# the non-adaptive models

ci.plot.vector<-ci.plot.vector.weak[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
o<-order(ci.plot.vector, decreasing=F)
additive.plot.data<-t(me.dif.abs.bias[analysis.indices[[2]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[2]],2,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[3]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[3]],2,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[4]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[4]],2,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
colorlabel<-ifelse(best.ind[o]>42, "darkred", "black")

df<-data.frame(additive.plot.vector, group=rep(group.label, 84))
textdat<-data.frame(ci.label.vector, group=group.label, y_pos=rep(0.25, 24))

additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label


# me difference abs bias levels
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

ci.plot.vector<-ci.plot.vector.add[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
o<-order(ci.plot.vector, decreasing=F)
additive.plot.data<-t(me.dif.abs.bias[analysis.indices[[1]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[1]],2,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])

df<-data.frame(additive.plot.vector, group=rep(group.label, 98))
textdat<-data.frame(ci.label.vector, group=group.label, y_pos=rep(0.25, 24))

additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label



# comprehensive plot
# first, order the elements
additive.plot.data<-t(me.dif.abs.bias[1:115,1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[,2,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(-0.04, 0.21)) +  ylab("Avg. Absolute Error in Estimate of Min-Max Marg. Effect Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0)) + theme(strip.text.y = element_text(angle=0, size=16) )+ theme(axis.text = element_text(size=16, color="black"), axis.title = element_text(size=16), legend.text = element_text(size=16), legend.title = element_text(size=16))
# Figure S-4(b)
dev.copy2pdf(file="me-diff-bias-d.pdf")



###########################################
# 
# Part Four: Size/Power Analysis
#
###########################################


###########################################
# size/power analysis for dichotomous DGPs
# N = 1000
###########################################

rm(list=ls())
set.seed(123456)
setwd("J:/Dropbox/jesarey_documents/BRE Logit Misspecification Paper/Final Replication Package")
library(foreign)
library(mvtnorm)

significance<-function(mu, sigma){
  
  mat.1<-matrix(dat=c(1,1,0,0))
  mat.2<-matrix(dat=c(1,0,0,0))
  mat.3<-matrix(dat=c(1,1,1,1))
  mat.4<-matrix(dat=c(1,0,1,0))
  
  require(mvtnorm)
  samples<-rmvnorm(1000, mu, sigma)
  effect.1<-samples%*%mat.1
  effect.2<-samples%*%mat.2
  effect.3<-samples%*%mat.3
  effect.4<-samples%*%mat.4
  
  dif.low<-effect.1-effect.2
  dif.high<-effect.3-effect.4
  
  dif.dist<-dif.high-dif.low
  dif.quant<-quantile(dif.dist, probs=c(0.975, 0.025))
  
  ifelse(sign(dif.quant[1])==sign(dif.quant[2]), return(1), return(0))
  
}

sig.mat<-matrix(data=NA, nrow=115, ncol=100)
sig.product<-matrix(data=NA, nrow=115, ncol=100)

pb<-txtProgressBar(initial=0, min=1, max=115, style=3)
for(i in 1:115){
  
  setTxtProgressBar(pb, value=i)
  if(i<10){master.dat<-read.dta(file=paste("sim0",i,"D.dta", sep = ""))
  }else{master.dat<-read.dta(file=paste("sim",i,"D.dta", sep = ""))}
  
  
  for(j in 1:100){
    
    baby.dat<-master.dat[(1+1000*(j-1)):(1000*j),]
    model<-glm(y~x1+x2+x1x2, family=binomial(link=probit), data=baby.dat)
    sig.product[i,j]<-ifelse(coefficients(summary(model))[4,4]<0.05, 1, 0)
    
    sig.mat[i,j]<-significance(coefficients(model), vcov(model))
    
  }
  
  
}
close(pb)

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115


power.product<-c()

power.product[1]<-100*mean(apply(X=sig.product[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.product[2]<-100*mean(apply(X=sig.product[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.product[3]<-100*mean(apply(X=sig.product[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.product[4]<-100*mean(apply(X=sig.product[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.product[5]<-100*mean(apply(X=sig.product[min.max.secdif.50,], FUN=mean, MARGIN=1))


par(mar=c(5,5,2,2))
second.difference<-c(0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.product~second.difference, type="l", ylim=c(-3, 102), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("% of Data Sets in which the Product Term is Stat. Sig.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)
# Figure S-5(b)
dev.copy2pdf(file="product-power-D.pdf")

product.size<-100*apply(X=sig.product[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(product.size, xlab="", ylab="% of Data Sets in which the Product Term is Stat. Sig.", cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)
# Figure S-5(a)
dev.copy2pdf(file="product-size-D.pdf")






# illustrate model dependence in the dichotomous DGPs
library(foreign)
setwd("J:/Dropbox/jesarey_documents/BRE Logit Misspecification Paper/Final Replication Package")
rm(list=ls())
set.seed(123456)
source(file="trupry_2013-8-18.r")

# Plot bias in the predictions of the marginal effect,
# dPr(y)/dx

me<-function(x1,x2,dgp,mod){
  
  x1.1<-x1+0.01 
  x1.2<-x1-0.01
  x2.1<-x2
  x2.2<-x2
  
  pred.dat.1<-data.frame(x1=x1.1, x2=x2.1, x1x2=x1.1*x2.1, x1sq=x1.1^2, x2sq=x2.1^2 ,x1sqx2=(x1.1^2)*x2.1, x2sqx1=(x2.1^2)*x1.1, x1sqx2sq=(x1.1^2)*(x2.1^2))
  est.1<-predict(mod, newdata=pred.dat.1, type="response")
  
  pred.dat.2<-data.frame(x1=x1.2, x2=x2.2, x1x2=x1.2*x2.2, x1sq=x1.2^2, x2sq=x2.2^2 ,x1sqx2=(x1.2^2)*x2.2, x2sqx1=(x2.2^2)*x1.2, x1sqx2sq=(x1.2^2)*(x2.2^2))
  est.2<-predict(mod, newdata=pred.dat.2, type="response")
  
  est.me<-(est.1-est.2)/0.02
  true.me<-as.vector((trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))/0.02)
  
  return(c(est.me-true.me))
  
}

x1.plot<-seq(0, 1, 0.1)
me.bias.0.mat<-matrix(data=NA, nrow=115, ncol=length(x1.plot))
me.bias.1.mat<-matrix(data=NA, nrow=115, ncol=length(x1.plot))

pb<-txtProgressBar(min=1, max=115, initial=0, style=3)
for(j in 1:115){
  
  setTxtProgressBar(pb, value=j)
  if(j<10){dat<-read.dta(file=paste("sim0",j,"D.dta", sep = ""))}
  else{dat<-read.dta(file=paste("sim",j,"D.dta", sep = ""))}
  glm.mod<-glm(y~x1+x2+x1x2+x1sq+x1sqx2, family=binomial(link="logit"), data=dat)
  for(i in 1:length(x1.plot)){
    me.bias.0.mat[j,i]<-me(x1.plot[i], 0, j, glm.mod)
    me.bias.1.mat[j,i]<-me(x1.plot[i], 1, j, glm.mod)
  }
  
  
}
close(pb)



continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115
analysis.indices<-list(continuous.additive, min.max.secdif.10, min.max.secdif.20, min.max.secdif.30, min.max.secdif.40, min.max.secdif.50)
#analysis.names<-c("continuous additive", "second difference = 0.1", "second difference = 0.2", "second difference = 0.3", "second difference = 0.4", "second difference = 0.5")
analysis.names<-c("additive", expression(abs(Delta*Delta["min-max"]) == 0.1), expression(abs(Delta*Delta["min-max"]) == 0.2), expression(abs(Delta*Delta["min-max"]) == 0.3), expression(abs(Delta*Delta["min-max"]) == 0.4), expression(abs(Delta*Delta["min-max"]) == 0.5))

me.bias.vec<-as.vector(rbind(me.bias.0.mat, me.bias.1.mat))
dgp<-rep(1:115, length(x1.plot))
loc<-factor(paste("X = ", rep(x1.plot, each=115)), levels=paste( "X = ", x1.plot))

dgp.type<-c()
for(i in 1:length(analysis.indices)){
  dgp.type[which(dgp%in%analysis.indices[[i]])]<-analysis.names[i]
}
dgp.type<-factor(dgp.type, levels=analysis.names)
ylabs <- parse(text=analysis.names)

library(ggplot2)
library(grid)
df<-data.frame(me.bias.vec, dgp=factor(dgp, levels=1:115), loc=loc, dgp.type=dgp.type)
p<-ggplot(df)
p <- p + geom_jitter(aes(me.bias.vec, dgp.type, colour = dgp.type)) + scale_y_discrete(labels=ylabs)  + facet_grid(loc~.) + ylab("DGP Type") + xlab("Bias in Marginal Effect Estimate") + theme(legend.position = "none", axis.title.y = element_text(vjust=3.2), axis.title.x = element_text(vjust=0.1), strip.text.y = element_text(angle=270, size=14) )+ theme(axis.text = element_text(size=12, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14)) + scale_colour_grey(start = 0.6, end = 0.2) + theme(plot.margin = unit(c(0.2,1,1,1), "cm")) 
suppressWarnings(print(p))

# Figure S-6
dev.copy2pdf(file="asymptotic_me_bias_D.pdf")
